import os
import pandas as pd
import json
from math import floor
from synthesis.synthesis import make_stimuli
from synthesis.settings import SETTINGS
from random import uniform, sample

BUCKET_NAME = 'emo-validation'
UPLOAD_TO_S3 = True
CREATE_STIMULI = True
STIMULI_FOLDER = '../validation_stimuli/'
SENTENCE_BASENAME = 'Harvard_L35_S0%d_0.wav'
ITERATION0_BASENAME = 'iteration0_sentence%d.wav'
RANDOM_SAMPLE_BASENAME = 'random_list%d_sentence%d.wav'
NUM_STIMULI_ALL_ITERATIONS = 20  # so each iteration two times
EMOTIONS = ['ANG', 'SAD', 'HAP']
SENTENCES = [1, 2, 3]
ITERATIONS = [i + 1 for i in list(range(20))]   # Careful this changed to 20 iterations
NUM_LISTS = 150  # 150 lists
DF_WITHIN_PATH = '../../../data/within/gibbs_experiment/export_CORRECTED.csv'
DF_ACROSS_PATH = '../../../data/across/complete_networks.csv'

if UPLOAD_TO_S3:
    # Make sure all stimuli are empty
    os.system('aws s3 rm s3://%s --recursive' % BUCKET_NAME)

if CREATE_STIMULI:
    # Create stimulus directories
    if not os.path.exists(STIMULI_FOLDER):
        os.mkdir(STIMULI_FOLDER)

    if not os.path.exists('stimuli_lists'):
        os.mkdir('stimuli_lists')

    def read_df(path):
        df = pd.read_csv(path)
        df['emotion'] = [emo.upper()[:3] for emo in df.emotion]
        return df

    # Read the correct experiment df for within
    df_within = read_df(DF_WITHIN_PATH)
    
    # Read data frame for across
    df_across = read_df(DF_ACROSS_PATH)
    
    chain_definition = {}

    # Shuffle the data frames
    df_within = df_within.sample(frac=1)
    df_across = df_across.sample(frac=1)

    df_random_samples = pd.DataFrame()

    def shuffle_from_available(visited_values):
        values = visited_values.values()
        keys = list(visited_values.keys())
        lowest = min(values)
        selected_key = sample([keys[idx] for idx, v in enumerate(values) if v == lowest], 1)[0]
        visited_values[selected_key] += 1
        return selected_key


    # Create iteration 0
    vector = SETTINGS['INITIAL_VALUES']
    for sentence in SENTENCES:
        chain_definition['file'] = SENTENCE_BASENAME % sentence
        output_path = STIMULI_FOLDER + ITERATION0_BASENAME % sentence
        make_stimuli(vector, output_path, chain_definition, SETTINGS)


    for i in range(NUM_LISTS):
        files_in_split = []
        visited_emotions = dict(zip(EMOTIONS, [0] * len(EMOTIONS)))
        visited_iterations = dict(zip(ITERATIONS, [0] * len(ITERATIONS)))

        # SAMPLE FROM WITHIN (20x)
        while len(files_in_split) < NUM_STIMULI_ALL_ITERATIONS:
            selected_emotion = shuffle_from_available(visited_emotions)
            selected_iteration = shuffle_from_available(visited_iterations)
            rows = df_within[(df_within.emotion == selected_emotion) & (df_within.iteration == selected_iteration)]
            if rows.shape[0] == 0:
                # If end of dataframe is reached, sample some again
                if df_within.shape[0] == 0:
                    df_within = read_df(DF_WITHIN_PATH)
                    df_within = df_within.sample(frac=1)
                continue
            else:
                row = rows.iloc[0, :]

                # Append filename to list
                filename = 'within_' + row['filename']
                files_in_split.append(filename)

                # do synthesis
                vector = row[SETTINGS['DIMENSION_NAMES']].values
                chain_definition['file'] = SENTENCE_BASENAME % row.sentence
                output_path = STIMULI_FOLDER + filename
                make_stimuli(vector, output_path, chain_definition, SETTINGS)

                # Remove from DF
                df_within = df_within.drop(row.name)

        # SAMPLE FROM ACROSS (20x)
        while len(files_in_split) < NUM_STIMULI_ALL_ITERATIONS*2:
            selected_emotion = shuffle_from_available(visited_emotions)
            selected_iteration = shuffle_from_available(visited_iterations)
            rows = df_across[(df_across.emotion == selected_emotion) & (df_across.iteration == selected_iteration)]
            if rows.shape[0] == 0:
                # If end of dataframe is reached, sample some again
                if df_across.shape[0] == 0:
                    df_across = read_df(DF_ACROSS_PATH)
                    df_across = df_across.sample(frac=1)
                continue
            else:
                row = rows.iloc[0, :]

                # Append filename to list
                filename = 'across_' + row['filename']
                files_in_split.append(filename)

                # do synthesis
                vector = row[SETTINGS['DIMENSION_NAMES']].values
                chain_definition['file'] = SENTENCE_BASENAME % row.sentence
                output_path = STIMULI_FOLDER + filename
                make_stimuli(vector, output_path, chain_definition, SETTINGS)

                # Remove from DF
                df_across = df_across.drop(row.name)

        # add 3 random samples (3x)
        for sentence in SENTENCES:
            vector = [uniform(r[0], r[1]) for r in SETTINGS['RANGES']]
            chain_definition['file'] = SENTENCE_BASENAME % sentence
            file_in_split = RANDOM_SAMPLE_BASENAME % (i, sentence)
            output_path = STIMULI_FOLDER + file_in_split
            make_stimuli(vector, output_path, chain_definition, SETTINGS)

            random_sample_row = dict(zip(SETTINGS['DIMENSION_NAMES'], vector))
            random_sample_row['filename'] = file_in_split
            df_random_samples = df_random_samples.append(random_sample_row, ignore_index=True)

            files_in_split.append(file_in_split)

        # add 1 iteration 0, randomly pick a sentence (1)
        files_in_split.append(ITERATION0_BASENAME % sample(SENTENCES, 1)[0])

        with open('stimuli_lists/%d.json' % i, 'w') as outfile:
            json.dump(files_in_split, outfile)

        print('Finished %d' % i)

    df_random_samples.to_csv('stimuli_lists/random_samples_df.csv')

if UPLOAD_TO_S3:
    # move them to S3
    os.system('aws s3 mv %s s3://%s --recursive' % (STIMULI_FOLDER, BUCKET_NAME))
